library(drake)
library(tidyverse)
loadd(data, data_trans, data_mice)

Look at NA values in raw data

map_int(data, ~sum(!is.na(.)))
##                               iso3c                        n_amr_events                  n_amr_first_events                  health_expend_perc 
##                                 190                                  59                                  59                                 183 
##              migrant_pop_per_capita                          population                       ab_export_bin                      english_spoken 
##                                 190                                 190                                 162                                 190 
##               human_consumption_ddd    livestock_consumption_kg_per_pcu                       livestock_pcu                ab_export_per_capita 
##                                  68                                 164                                 164                                  87 
##                ab_import_per_capita livestock_consumption_kg_per_capita                      gdp_per_capita         tourism_outbound_per_capita 
##                                 161                                  31                                 190                                  44 
##          tourism_inbound_per_capita                 pubcrawl_per_capita          promed_mentions_per_capita 
##                                 111                                 180                                 189
data %>% 
  filter(pubcrawl_per_capita == 0 | is.na(pubcrawl_per_capita)) %>%
  nrow()
## [1] 33
data %>% 
  filter(promed_mentions_per_capita == 0 | is.na( promed_mentions_per_capita)) %>%
  nrow()
## [1] 1
data %>% 
  filter(ab_export_per_capita == 0 | is.na( ab_export_per_capita)) %>%
  nrow()
## [1] 103

Look at NA values and distributions post-NA processing

map_int(data_trans, ~sum(!is.na(.)))
##                                  iso3c                           n_amr_events                     health_expend_perc              ln_migrant_pop_per_capita 
##                                    190                                    190                                    183                                    190 
##                          ln_population                          ab_export_bin                         english_spoken                  human_consumption_ddd 
##                                    190                                    190                                    190                                     68 
##                       ln_livestock_pcu                ln_ab_export_per_capita                ln_ab_import_per_capita ln_livestock_consumption_kg_per_capita 
##                                    164                                    190                                    161                                     31 
##                      ln_gdp_per_capita         ln_tourism_outbound_per_capita          ln_tourism_inbound_per_capita                 ln_pubcrawl_per_capita 
##                                    190                                     44                                    111                                    190 
##          ln_promed_mentions_per_capita 
##                                    190
map_lgl(data_trans, ~any(is.infinite(.))) # confirm no infinite values
##                                  iso3c                           n_amr_events                     health_expend_perc              ln_migrant_pop_per_capita 
##                                  FALSE                                  FALSE                                  FALSE                                  FALSE 
##                          ln_population                          ab_export_bin                         english_spoken                  human_consumption_ddd 
##                                  FALSE                                  FALSE                                  FALSE                                  FALSE 
##                       ln_livestock_pcu                ln_ab_export_per_capita                ln_ab_import_per_capita ln_livestock_consumption_kg_per_capita 
##                                  FALSE                                  FALSE                                  FALSE                                  FALSE 
##                      ln_gdp_per_capita         ln_tourism_outbound_per_capita          ln_tourism_inbound_per_capita                 ln_pubcrawl_per_capita 
##                                  FALSE                                  FALSE                                  FALSE                                  FALSE 
##          ln_promed_mentions_per_capita 
##                                  FALSE
data_trans %>%
  select(-iso3c, -n_amr_events, -english_spoken) %>%
  gather() %>%
  ggplot(aes(x = value)) +
  geom_histogram() + 
  facet_wrap(key~., scales = "free")

data_trans %>%
  dplyr::select(-iso3c, -ln_livestock_pcu, -ln_ab_import_per_capita, -ab_export_bin, -english_spoken) %>%
  PerformanceAnalytics::chart.Correlation(., histogram = TRUE, pch = 19, method = "spearman")

Look at imputed data

plot(data_mice) # On convergence, the different streams should be freely intermingled with one another, without showing any definite trends. Convergence is diagnosed when the variance between different sequences is no larger than the variance within each individual sequence.

show_imputes(data_mice, m = data_mice[["m"]], raw = data_trans)

imp <- complete(data_mice)
imp %>%
  dplyr::select(-iso3c) %>%
  PerformanceAnalytics::chart.Correlation(., histogram = TRUE, pch = 19, method = "spearman")